package com.cmge.ad.spider.pic.meinv;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

import com.cmge.ad.util.HttpClientUtil;
import com.cmge.ad.util.JsonUtil;
import com.cmge.ad.util.SuprUtil;

/**
 * @desc	百度抓取
 * 
 * 			http://image.baidu.com/channel?c=美女
 * 			http://image.baidu.com/channel?c=%E7%BE%8E%E5%A5%B3
 * 			http://image.baidu.com/data/imgs?col=%E7%BE%8E%E5%A5%B3&tag=%E5%85%A8%E9%83%A8&sort=0&tag3=&pn=0&rn=60&p=channel&from=1
 * 			有30000张  每个分页60 有500页
 * 
 * 
 * 			详情：http://image.baidu.com/search/detail?word=%E4%BF%A1%E6%81%AF%E5%9B%BE%E8%A1%A8&tn=baiduimagedetail&os=182542327%2C200638614
 * 			os来自于接口返回的os
 * 			接口： http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word=%E4%BF%A1%E6%81%AF%E5%9B%BE%E8%A1%A8&pn=120&rn=60
 * @author	ljt
 * @time	2014-12-30 下午7:51:34
 */
public class BaiduImagChannelCrawl implements PageProcessor {
	
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    private static int pageNum = 0;
    
    private static int maxPageNumber = 2;
    
    private static String keyWord;
    
    private static String detailUrl;
    
    private static String url;
    
    private static List<BaiduImage> resultList = new ArrayList<BaiduImage>();
    
    private boolean flag = true;
    
    private static List<String> crawlPicList = new ArrayList<String>();
    
    static{
    	try {
			keyWord = "壁纸";
			url = "http://image.baidu.com/data/imgs?rn=24&col=%E5%A3%81%E7%BA%B8"
					+"&tag=%E5%85%A8%E9%83%A8"
					+"&tag3="
					+"&width=1366"
					+"&height=768"
					+"&ic=0"
					+"&ie=utf8"
					+"&oe=utf-8"
					+"&image_id="
					+"&fr=channel"
					+"&p=channel"
					+"&from=1"
					+"&app=img.browse.channel.wallpaper"
					+"&pn=";
			detailUrl = "http://image.baidu.com/search/detail?word="+URLEncoder.encode(keyWord, "utf-8")+"&tn=baiduimagedetail&os=";
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}
    }
    
    @Override
    public void process(Page page) {
		String picUrl = null;
		try {
//			System.out.println(page.getHtml().get());
			picUrl = page.getHtml().xpath("//div[@id='srcPic']//img/@src").get().toString();
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		System.out.println(picUrl);
		crawlPicList.add(picUrl);
		
		if(flag){
			flag = false;
			startThread();
//			for(BaiduImage image : resultList){
//				String url = detailUrl + image.getOs();
//				page.addTargetRequest(url);
//    		}
		}
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) throws Exception {
    	
    	// 循环调用接口
    	while(pageNum <= maxPageNumber){
    		// 调用接口
    		BaiduImageResult temp = (BaiduImageResult) JsonUtil.getGson().fromJson(HttpClientUtil.getJson(url+(pageNum*24)), BaiduImageResult.class) ;
    		if(SuprUtil.isEmptyCollection(temp.getImgs())){
    			break;
    		}else{
    			for(BaiduImage bi : temp.getImgs()){
    				crawlPicList.add(bi.getDownloadUrl());
    			}
    			pageNum++;
    		}
    	}
    	
    	System.out.println(crawlPicList.size());
    	
    	startThread();
    	
//    	Spider qsSpider = Spider.create(new BaiduImagChannelCrawl())
//    					.addUrl("http://image.baidu.com/search/detail?z=0&ipn=false&ie=utf-8&oe=utf-8&sme=&cg=wallpaper&gsm=f0&pn=1&word="+keyWord+"&tn=baiduimagedetail&os=2685345409%2C683453902")
////    					.addPipeline(new RedisPipeline())
////    					.addPipeline(new JsonFilePipeline())
////    					.addPipeline(new JsonPipeline())
////    					.addPipeline(new MysqlPicturePipeline())
//    					.thread(1);
//    	qsSpider.start();
    	
    }	
	
	
    private static void startThread() {
		
    	// 启动线程下载图片
    	Thread t = new Thread(new Runnable() {
			
    		int i = 0;
    		
			@Override
			public void run() {
				while(true){
					if(crawlPicList.size() > i && crawlPicList.get(i) != null){
						// 下载图片到本地
						uploadPic(crawlPicList.get(i));
						i++;
					}else{
						try {
							Thread.sleep(1000);
						} catch (InterruptedException e) {
							e.printStackTrace();
						}
					}
				}
			}

			private void uploadPic(String picUrl) {
				System.out.println("正在下载："+picUrl);
				
				// 下载网络文件
		        int byteread = 0;

		        try {
		        	URL url = new URL(picUrl);
		            URLConnection conn = url.openConnection();
		            conn.setReadTimeout(1000);
		            conn.setConnectTimeout(1000);
		            InputStream inStream = conn.getInputStream();
		            String fileName = picUrl.substring(picUrl.lastIndexOf("/") + 1).toUpperCase();
		            File file = new File("E:/baiduPic/"+keyWord);
		            if(!file.exists()){
		            	file.mkdir();
		            }
		            FileOutputStream fs = new FileOutputStream("E:/baiduPic/"+keyWord+"/"+fileName);

		            byte[] buffer = new byte[1204];
		            while ((byteread = inStream.read(buffer)) != -1) {
		                fs.write(buffer, 0, byteread);
		            }
		        } catch (FileNotFoundException e) {
		            e.printStackTrace();
		        } catch (IOException e) {
		            e.printStackTrace();
		        }
			}
		});
    	
    	t.start();
		
	}


	static class BaiduImageResult{
    	private ArrayList<BaiduImage> imgs;

		public ArrayList<BaiduImage> getImgs() {
			return imgs;
		}

		public void setImgs(ArrayList<BaiduImage> imgs) {
			this.imgs = imgs;
		}

    }
    
    static class BaiduImage{
    	private String downloadUrl;

		public String getDownloadUrl() {
			return downloadUrl;
		}

		public void setDownloadUrl(String downloadUrl) {
			this.downloadUrl = downloadUrl;
		}
    }
}
